# 02_basic_preference_summary.R
# Produces basic summaries of patterns in preferences 
# Produces:
# - Figure S1: How often each category is prioritized, evenly weighing 
#   all housing authorities vs weighing by number of vouchers administered

library(tidyverse)
library(ggplot2)
library(xtable)
library(ggthemes)
library(here)

options(scipen=999)

# Load data on PHAs in sample
pha_pref_ref <- readRDS(here("data", "pha_pref_ref.RDS"))
# Load data on PHA characteristics
pha_all <-  readRDS(here("data", "pha_all.RDS"))
# Load coded preference data
codes <- readRDS(here("data", "codes_expanded.RDS"))

# Subset to only PHAs in sample
pha_sample <- pha_pref_ref %>%
  filter(in_sample_abt == TRUE) %>%
  left_join(pha_all, by = "pha_code")

#########################################
## Summarize prevalence of preferences ##
#########################################

# Get proportion with amy preferences
prop.table(table(pha_sample$any_preferences))

# Create binary indicator of use of each preference category
# by PHA 
pref_use_summary_coded <-  codes %>%
  filter(any_preferences == 1) %>%
  dplyr::select(pha_code, unit_single:special_defs) %>%
  group_by(pha_code) %>%
  summarize_all(list(max))

# Append rows for housing authorities with no preferences
# so they count toward the denominator
no_prefs <- pha_sample %>%
  filter(any_preferences == 0) %>%
  select(pha_code) 

pref_use_summary <- pref_use_summary_coded %>%
  # Add on rows for no preference, where all the category values are NA
  bind_rows(no_prefs) %>%
  # replace all NAs with 0 
  mutate_all(~replace(., is.na(.), 0))

# Read in table with preference descriptions 
pref_descrip <- read_csv(here("data", "pref_summary_final_122022.csv"))

# Summarize prevalence, given a set of weights. Defaults to 
# weighting all PHAs evenly
summarize_prev <- function(pref_use_df, weights = NULL) {
  if (is.null(weights)) {
    weights = rep(1, nrow(pref_use_df))
  }
  pref_prevalence <- pref_use_df %>%
    gather(key = "Category", value = "Count", -pha_code) %>%
    group_by(Category) %>%
    summarize(Prevalence = weighted.mean(Count, weights)) %>%
    mutate(Category = gsub("_", " ", Category)) %>%
    arrange(desc(Prevalence))
  
  return(pref_prevalence)
}

# Summarize preference prevalence in overall sample
# weighing all housing authorities equally
prev_summary_overall <- summarize_prev(pref_use_summary)

# Summarize preference prevalence weighted by number of 
# vouchers administered
pref_use_summary_vms <- pha_sample %>%
  select(pha_code, hud_med_month_vouchers) %>%
  left_join(pref_use_summary, by = "pha_code") 

prev_summary_vms <- summarize_prev(select(pref_use_summary_vms, 
                                          -hud_med_month_vouchers),
                                   pref_use_summary_vms$hud_med_month_vouchers)

# Create plot comparing prevalence with housing authority weighting and 
# # of voucher-based weighting 
prev_summary_wgt_comp <- prev_summary_vms %>%
  rename(`By # vouchers` = Prevalence) %>%
  # join on prevalence based on PHAs all being weighted equally
  left_join(prev_summary_overall, by = "Category") %>%
  rename(Equal = Prevalence) %>%
  left_join(select(pref_descrip,
                   category, label), by = c("Category" = "category")) %>%
  gather(key = "weighting", value = "Prevalence", -Category, -label) 

# Plot in order of decreasing prevalence 
set_plot_order <- prev_summary_wgt_comp %>%
  filter(weighting == "Equal") %>%
  arrange(Prevalence) %>% 
  mutate(label = factor(label))

prev_summary_wgt_comp %>%
  mutate(label = factor(label, levels = set_plot_order$label, 
                        ordered = TRUE)) %>%
  ggplot(aes(x = label, 
             y = Prevalence,
           fill = factor(weighting))) +
  geom_bar(stat = "identity", 
           color = "black", alpha = 0.5,
           width = 0.6,
           position = "dodge")  +
  ylab("Prevalence") +
  xlab("Category") +
  scale_fill_brewer(palette = "Dark2") +
  theme(axis.text.x = element_text(angle = 90),
        legend.position = c(0.75, 0.1)) +
  coord_flip() +
  labs(fill = "PHA Weights") +
  theme_minimal() 

ggsave(here("figs", "supp_prevalence_vms.png"), 
       plot = last_plot(),
       device = "png", width = 10, height = 9,
       dpi = 300)
